library(tidyr)
library(dplyr)
library(ggplot2)
data_grouped <- data %>%
mutate(eval_valuation = case_when(eval_valuation == 1 ~ "1.0",
eval_valuation == 0 ~ "0.0",
0 < eval_valuation & eval_valuation <= 1/3 ~ "(0, 1/3]",
1/3 < eval_valuation & eval_valuation <= 2/3 ~ "(1/3, 2/3]",
2/3 < eval_valuation & eval_valuation < 1 ~ "(2/3, 1)",
.default = NA
)) %>%
mutate(eval_valuation = factor(eval_valuation, levels = c("0.0", "(0, 1/3]", "(1/3, 2/3]", "(2/3, 1)", "1.0"), ordered = TRUE))
data_grouped <- data_grouped %>%
group_by(module, agent_name, eval_valuation) %>%
summarize(count = n()) %>%
ungroup() %>%
complete(module, agent_name, eval_valuation, fill = list(count = 0)) %>%
arrange(agent_name, module)
## `summarise()` has grouped output by 'module', 'agent_name'. You can override
## using the `.groups` argument.
print(data_grouped)
## # A tibble: 40 × 4
## module agent_name eval_valuation count
## <chr> <chr> <ord> <int>
## 1 Gene alias Dummy Agent GPT35 0.0 45
## 2 Gene alias Dummy Agent GPT35 (0, 1/3] 0
## 3 Gene alias Dummy Agent GPT35 (1/3, 2/3] 0
## 4 Gene alias Dummy Agent GPT35 (2/3, 1) 0
## 5 Gene alias Dummy Agent GPT35 1.0 5
## 6 Gene disease association Dummy Agent GPT35 0.0 24
## 7 Gene disease association Dummy Agent GPT35 (0, 1/3] 2
## 8 Gene disease association Dummy Agent GPT35 (1/3, 2/3] 4
## 9 Gene disease association Dummy Agent GPT35 (2/3, 1) 1
## 10 Gene disease association Dummy Agent GPT35 1.0 19
## # ℹ 30 more rows
# data_wide <- pivot_wider(data %>% select(-filename, -agent_answer), names_from = "agent_name", values_from = c(eval_valuation, agent_answer_num_function_calls))
# print(head(data_wide))
ggplot(data_grouped, aes(x=eval_valuation, y=count, fill=eval_valuation)) +
geom_bar(stat="identity") +
facet_grid(module ~ agent_name) +
theme_minimal() +
labs(title="Score distributions by task and agent type") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_y_continuous(limits = c(0, 50), name = "Count of Questions") +
scale_x_discrete(name = "Score Bucket") +
guides(fill = "none")
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## Warning in geom_hline(aes(yintercept = mean(eval_valuation), x = agent_name)):
## Ignoring unknown aesthetics: x
## Warning in geom_point(aes(alpha = 0.6, customdata =
## "https://github.com/monarch-initiative/oai-plugin-evals/blob/main/results/" %+%
## : Ignoring unknown aesthetics: customdata and text